import sys
import json
import requests

from config import getConfigParams
from requestUrl import getEntryIdByUrl
from pyquery import PyQuery as pq
from logs import saveLog
from formatContent import formatContent, removeSrcParams, filterTag

# 获取配置参数
configParams = getConfigParams(sys.argv)
url = configParams['real_url']
env = configParams['env']

# 获取文章ID
entryId = getEntryIdByUrl(configParams['base_url'])

headers = {
    'Host': 'www.zcool.com.cn',
}

# 获取html文本doc，得到pq对象
response = requests.get(url=url, headers=headers)
doc = pq(response.text)

# 获取objectId，以拿到相关信息
objectId = doc('#dataInput').attr('data-objid')

workUrl = 'https://www.zcool.com.cn/work/content/show?p=1&objectId=' + str(objectId)
response = requests.get(url=workUrl, headers=headers)
data = json.loads(response.content.decode())['data']
product = data['product']

# 标题
title = product['title']

# 正文
# 移除图片区域隐藏标签
imageArea = filterTag(doc('.work-show-box'), ['.image-info-icons span'])
articleContent = doc('.atricle-text').html() + imageArea.html()

# 封面图
coverUrl = product['cover']

# 作者昵称
authorName = product['creatorObj']['username']

# 去【李世泉】那里过滤标签
content = formatContent(articleContent, env=env)

result = {
    'entry_id': entryId,
    'not_format_content': articleContent,
    'content': content,
    'author_name': authorName,
    'cover_url': coverUrl,
    'public_time': 0,
    'title': title,
}
logData = {
    'params': sys.argv,
    'result': result
}
saveLog(json.dumps(logData, ensure_ascii=False))
# 结果输出
print(json.dumps(result, ensure_ascii=False))
